msg_tool\scripts\bgi/
script.rs

1//! Buriko General Interpreter/Ethornell Script
2use super::parser::*;
3use crate::ext::io::*;
4use crate::scripts::base::*;
5use crate::types::*;
6use crate::utils::encoding::{decode_to_string, encode_string};
7use anyhow::Result;
8use fancy_regex::Regex;
9use lazy_static::lazy_static;
10use std::collections::{BTreeMap, HashMap};
11
12#[derive(Debug)]
13/// Builder for BGI scripts.
14pub struct BGIScriptBuilder {}
15
16impl BGIScriptBuilder {
17    /// Creates a new instance of `BGIScriptBuilder`.
18    pub fn new() -> Self {
19        BGIScriptBuilder {}
20    }
21}
22
23impl ScriptBuilder for BGIScriptBuilder {
24    fn default_encoding(&self) -> Encoding {
25        #[cfg(not(windows))]
26        return Encoding::Cp932;
27        #[cfg(windows)]
28        // Use Windows API first, because encoding-rs does not support PRIVATE USE AREA characters
29        return Encoding::CodePage(932);
30    }
31
32    fn build_script(
33        &self,
34        buf: Vec<u8>,
35        _filename: &str,
36        encoding: Encoding,
37        _archive_encoding: Encoding,
38        config: &ExtraConfig,
39        _archive: Option<&Box<dyn Script>>,
40    ) -> Result<Box<dyn Script>> {
41        Ok(Box::new(BGIScript::new(buf, encoding, config)?))
42    }
43
44    fn extensions(&self) -> &'static [&'static str] {
45        &[]
46    }
47
48    fn script_type(&self) -> &'static ScriptType {
49        &ScriptType::BGI
50    }
51
52    fn is_this_format(&self, _filename: &str, buf: &[u8], buf_len: usize) -> Option<u8> {
53        if buf_len > 28 && buf.starts_with(b"BurikoCompiledScriptVer1.00\0") {
54            return Some(255);
55        }
56        None
57    }
58}
59
60/// BGI Script
61pub struct BGIScript {
62    data: MemReader,
63    encoding: Encoding,
64    strings: Vec<BGIString>,
65    is_v1: bool,
66    is_v1_instr: bool,
67    offset: usize,
68    import_duplicate: bool,
69    append: bool,
70}
71
72impl std::fmt::Debug for BGIScript {
73    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
74        f.debug_struct("BGIScript")
75            .field("encoding", &self.encoding)
76            .finish_non_exhaustive()
77    }
78}
79
80impl BGIScript {
81    /// Creates a new instance of `BGIScript` from a buffer.
82    ///
83    /// * `data` - The buffer containing the script data.
84    /// * `encoding` - The encoding of the script.
85    /// * `config` - Extra configuration options.
86    pub fn new(data: Vec<u8>, encoding: Encoding, config: &ExtraConfig) -> Result<Self> {
87        let data = MemReader::new(data);
88        if data.data.starts_with(b"BurikoCompiledScriptVer1.00\0") {
89            let mut parser = V1Parser::new(data.to_ref(), encoding)?;
90            parser.disassemble()?;
91            let strings = parser.strings.clone();
92            let offset = parser.offset;
93            Ok(Self {
94                data,
95                encoding,
96                strings,
97                is_v1: true,
98                is_v1_instr: true,
99                offset,
100                import_duplicate: config.bgi_import_duplicate,
101                append: !config.bgi_disable_append,
102            })
103        } else {
104            let mut is_v1_instr = false;
105            let strings = {
106                let mut parser = V0Parser::new(data.to_ref());
107                match parser.disassemble() {
108                    Ok(_) => parser.strings,
109                    Err(_) => {
110                        let mut parser = V1Parser::new(data.to_ref(), encoding)?;
111                        parser.disassemble()?;
112                        is_v1_instr = true;
113                        parser.strings
114                    }
115                }
116            };
117            Ok(Self {
118                data,
119                encoding,
120                strings,
121                is_v1: false,
122                is_v1_instr,
123                offset: 0,
124                import_duplicate: config.bgi_import_duplicate,
125                append: !config.bgi_disable_append,
126            })
127        }
128    }
129
130    fn read_string(&self, offset: usize) -> Result<String> {
131        let start = self.offset + offset;
132        let string_data = self.data.cpeek_cstring_at(start as u64)?;
133        // sometimes string has private use area characters, so we disable strict checking
134        let string = decode_to_string(self.encoding, string_data.as_bytes(), false)?;
135        Ok(string)
136    }
137
138    fn output_with_ruby(str: &mut String, ruby: &mut Vec<String>) -> Result<()> {
139        if ruby.is_empty() {
140            return Ok(());
141        }
142        if ruby.len() % 2 != 0 {
143            return Err(anyhow::anyhow!("Ruby strings count is not even."));
144        }
145        for i in (0..ruby.len()).step_by(2) {
146            let ruby_str = &ruby[i];
147            let ruby_text = &ruby[i + 1];
148            if ruby_str.is_empty() || ruby_text.is_empty() {
149                continue;
150            }
151            *str = str.replace(ruby_str, &format!("<r{ruby_text}>{ruby_str}</r>"));
152        }
153        ruby.clear();
154        Ok(())
155    }
156}
157
158impl Script for BGIScript {
159    fn default_output_script_type(&self) -> OutputScriptType {
160        OutputScriptType::Json
161    }
162
163    fn default_format_type(&self) -> FormatOptions {
164        if self.is_v1_instr {
165            FormatOptions::None
166        } else {
167            FormatOptions::Fixed {
168                length: 32,
169                keep_original: false,
170            }
171        }
172    }
173
174    fn extract_messages(&self) -> Result<Vec<Message>> {
175        let mut messages = Vec::new();
176        let mut name = None;
177        let mut ruby = Vec::new();
178        for bgi_string in &self.strings {
179            match bgi_string.typ {
180                BGIStringType::Name => {
181                    name = Some(self.read_string(bgi_string.address)?);
182                }
183                BGIStringType::Message => {
184                    let mut message = self.read_string(bgi_string.address)?;
185                    if !ruby.is_empty() {
186                        Self::output_with_ruby(&mut message, &mut ruby)?;
187                    }
188                    messages.push(Message {
189                        name: name.take(),
190                        message: message,
191                    });
192                }
193                BGIStringType::Ruby => {
194                    let ruby_str = self.read_string(bgi_string.address)?;
195                    ruby.push(ruby_str);
196                }
197                _ => {}
198            }
199        }
200        Ok(messages)
201    }
202
203    fn import_messages<'a>(
204        &'a self,
205        mut messages: Vec<Message>,
206        mut file: Box<dyn WriteSeek + 'a>,
207        _filename: &str,
208        encoding: Encoding,
209        replacement: Option<&'a ReplacementTable>,
210    ) -> Result<()> {
211        if !self.import_duplicate {
212            let mut used = HashMap::new();
213            let mut extra = HashMap::new();
214            let mut mes = messages.iter_mut();
215            let mut cur_mes = mes.next();
216            let mut old_offset = 0;
217            let mut new_offset = 0;
218            let mut rubys = Vec::new();
219            let mut parsed_ruby = false;
220            if self.append {
221                file.write_all(&self.data.data)?;
222                new_offset = self.data.data.len();
223            }
224            for curs in &self.strings {
225                if !curs.is_internal() {
226                    if cur_mes.is_none() {
227                        cur_mes = mes.next();
228                    }
229                }
230                if used.contains_key(&curs.address) && curs.is_internal() {
231                    let (_, new_address) = used.get(&curs.address).unwrap();
232                    file.write_u32_at(curs.offset as u64, *new_address as u32)?;
233                    continue;
234                }
235                let nmes = match curs.typ {
236                    BGIStringType::Internal => self.read_string(curs.address)?,
237                    BGIStringType::Ruby => {
238                        if !self.is_v1 && self.is_v1_instr {
239                            if rubys.is_empty() {
240                                if parsed_ruby {
241                                    String::from("<")
242                                } else {
243                                    rubys = match &mut cur_mes {
244                                        Some(m) => parse_ruby_from_text(&mut m.message)?,
245                                        None => return Err(anyhow::anyhow!("No enough messages.")),
246                                    };
247                                    parsed_ruby = true;
248                                    if rubys.is_empty() {
249                                        String::from("<")
250                                    } else {
251                                        let ruby_str = rubys.remove(0);
252                                        ruby_str
253                                    }
254                                }
255                            } else {
256                                rubys.remove(0)
257                            }
258                        } else {
259                            self.read_string(curs.address)?
260                        }
261                    }
262                    BGIStringType::Name => match &cur_mes {
263                        Some(m) => {
264                            if let Some(name) = &m.name {
265                                let mut name = name.clone();
266                                if let Some(replacement) = replacement {
267                                    for (key, value) in replacement.map.iter() {
268                                        name = name.replace(key, value);
269                                    }
270                                }
271                                name
272                            } else {
273                                return Err(anyhow::anyhow!("Name is missing for message."));
274                            }
275                        }
276                        None => return Err(anyhow::anyhow!("No enough messages.")),
277                    },
278                    BGIStringType::Message => {
279                        if !rubys.is_empty() {
280                            eprintln!("Warning: Some ruby strings are unused: {:?}", rubys);
281                            crate::COUNTER.inc_warning();
282                            rubys.clear();
283                        }
284                        parsed_ruby = false;
285                        let mes = match &cur_mes {
286                            Some(m) => {
287                                let mut message = m.message.clone();
288                                if let Some(replacement) = replacement {
289                                    for (key, value) in replacement.map.iter() {
290                                        message = message.replace(key, value);
291                                    }
292                                }
293                                message
294                            }
295                            None => return Err(anyhow::anyhow!("No enough messages.")),
296                        };
297                        cur_mes.take();
298                        mes
299                    }
300                };
301                let in_used = match used.get(&curs.address) {
302                    Some((s, address)) => {
303                        if s == &nmes {
304                            file.write_u32_at(curs.offset as u64, *address as u32)?;
305                            continue;
306                        }
307                        if let Some(address) = extra.get(&nmes) {
308                            file.write_u32_at(curs.offset as u64, *address as u32)?;
309                            continue;
310                        }
311                        true
312                    }
313                    None => false,
314                };
315                let bgi_str_old_offset = curs.address + self.offset;
316                if !self.append && old_offset < bgi_str_old_offset {
317                    file.write_all(&self.data.data[old_offset..bgi_str_old_offset])?;
318                    new_offset += bgi_str_old_offset - old_offset;
319                    old_offset = bgi_str_old_offset;
320                }
321                let old_str_len = self
322                    .data
323                    .cpeek_cstring_at(bgi_str_old_offset as u64)?
324                    .as_bytes_with_nul()
325                    .len();
326                let nmess = encode_string(encoding, &nmes, false)?;
327                let write_to_original = self.append && !in_used && nmess.len() + 1 <= old_str_len;
328                if write_to_original {
329                    file.write_all_at(bgi_str_old_offset as u64, &nmess)?;
330                    file.write_u8_at(bgi_str_old_offset as u64 + nmess.len() as u64, 0)?; // null terminator
331                } else {
332                    file.write_all(&nmess)?;
333                    file.write_u8(0)?; // null terminator
334                }
335                let new_address = if write_to_original {
336                    bgi_str_old_offset - self.offset
337                } else {
338                    new_offset - self.offset
339                };
340                file.write_u32_at(curs.offset as u64, new_address as u32)?;
341                if in_used {
342                    extra.insert(nmes, new_address);
343                } else {
344                    used.insert(curs.address, (nmes, new_address));
345                }
346                old_offset += old_str_len;
347                if !write_to_original {
348                    new_offset += nmess.len() + 1; // +1 for null terminator
349                }
350            }
351            if cur_mes.is_some() || mes.next().is_some() {
352                return Err(anyhow::anyhow!("Some messages were not processed."));
353            }
354            if !self.append && old_offset < self.data.data.len() {
355                file.write_all(&self.data.data[old_offset..])?;
356            }
357            return Ok(());
358        }
359        let mut mes = messages.iter_mut();
360        let mut cur_mes = None;
361        let mut strs = self.strings.iter();
362        let mut nstrs = Vec::new();
363        let mut cur_str = strs.next();
364        let mut old_offset = 0;
365        let mut new_offset = 0;
366        let mut rubys = Vec::new();
367        let mut parsed_ruby = false;
368        if self.append {
369            file.write_all(&self.data.data)?;
370            new_offset = self.data.data.len();
371        }
372        while let Some(curs) = cur_str {
373            if !curs.is_internal() {
374                if cur_mes.is_none() {
375                    cur_mes = mes.next();
376                }
377            }
378            let bgi_str_old_offset = curs.address + self.offset;
379            if !self.append && old_offset < bgi_str_old_offset {
380                file.write_all(&self.data.data[old_offset..bgi_str_old_offset])?;
381                new_offset += bgi_str_old_offset - old_offset;
382                old_offset = bgi_str_old_offset;
383            }
384            let old_str_len = self
385                .data
386                .cpeek_cstring_at((curs.address + self.offset) as u64)?
387                .as_bytes_with_nul()
388                .len();
389            let nmes = match curs.typ {
390                BGIStringType::Internal => self.read_string(curs.address)?,
391                BGIStringType::Ruby => {
392                    if !self.is_v1 && self.is_v1_instr {
393                        if rubys.is_empty() {
394                            if parsed_ruby {
395                                String::from("<")
396                            } else {
397                                rubys = match &mut cur_mes {
398                                    Some(m) => parse_ruby_from_text(&mut m.message)?,
399                                    None => return Err(anyhow::anyhow!("No enough messages.")),
400                                };
401                                parsed_ruby = true;
402                                if rubys.is_empty() {
403                                    String::from("<")
404                                } else {
405                                    let ruby_str = rubys.remove(0);
406                                    ruby_str
407                                }
408                            }
409                        } else {
410                            rubys.remove(0)
411                        }
412                    } else {
413                        self.read_string(curs.address)?
414                    }
415                }
416                BGIStringType::Name => match &cur_mes {
417                    Some(m) => {
418                        if let Some(name) = &m.name {
419                            let mut name = name.clone();
420                            if let Some(replacement) = replacement {
421                                for (key, value) in replacement.map.iter() {
422                                    name = name.replace(key, value);
423                                }
424                            }
425                            name
426                        } else {
427                            return Err(anyhow::anyhow!("Name is missing for message."));
428                        }
429                    }
430                    None => return Err(anyhow::anyhow!("No enough messages.")),
431                },
432                BGIStringType::Message => {
433                    if !rubys.is_empty() {
434                        eprintln!("Warning: Some ruby strings are unused: {:?}", rubys);
435                        crate::COUNTER.inc_warning();
436                        rubys.clear();
437                    }
438                    parsed_ruby = false;
439                    let mes = match &cur_mes {
440                        Some(m) => {
441                            let mut message = m.message.clone();
442                            if let Some(replacement) = replacement {
443                                for (key, value) in replacement.map.iter() {
444                                    message = message.replace(key, value);
445                                }
446                            }
447                            message
448                        }
449                        None => return Err(anyhow::anyhow!("No enough messages.")),
450                    };
451                    cur_mes.take();
452                    mes
453                }
454            };
455            let nmes = encode_string(encoding, &nmes, false)?;
456            file.write_all(&nmes)?;
457            file.write_u8(0)?;
458            let new_str_len = nmes.len() + 1; // +1 for null terminator
459            let new_address = new_offset - self.offset;
460            nstrs.push(BGIString {
461                offset: curs.offset,
462                address: new_address,
463                typ: curs.typ.clone(),
464            });
465            old_offset += old_str_len;
466            new_offset += new_str_len;
467            cur_str = strs.next();
468        }
469        if cur_mes.is_some() || mes.next().is_some() {
470            return Err(anyhow::anyhow!("Some messages were not processed."));
471        }
472        for str in nstrs {
473            file.write_u32_at(str.offset as u64, str.address as u32)?;
474        }
475        if !self.append && old_offset < self.data.data.len() {
476            file.write_all(&self.data.data[old_offset..])?;
477        }
478        Ok(())
479    }
480}
481
482lazy_static! {
483    static ref RUBY_REGEX: Regex = Regex::new(r"<r([^>]+)>([^<]+)</r>").unwrap();
484}
485
486fn parse_ruby_from_text(text: &mut String) -> Result<Vec<String>> {
487    let mut map = BTreeMap::new();
488    for i in RUBY_REGEX.captures_iter(&text) {
489        let i = i?;
490        let ruby_text = i.get(1).map_or("", |m| m.as_str());
491        let ruby_str = i.get(2).map_or("", |m| m.as_str());
492        if !ruby_text.is_empty() && !ruby_str.is_empty() {
493            map.insert(ruby_str.to_owned(), ruby_text.to_owned());
494        }
495    }
496    let mut result = Vec::new();
497    for (ruby_str, ruby_text) in map {
498        *text = text.replace(&format!("<r{ruby_text}>{ruby_str}</r>"), &ruby_str);
499        result.push(ruby_str);
500        result.push(ruby_text);
501    }
502    Ok(result)
503}
504
505#[test]
506fn test_parse_ruby_from_text() {
507    let mut text =
508        String::from("This is a test <rRubyText>RubyString</r> and <rAnotherText>AnotherRuby</r>.");
509    let ruby = parse_ruby_from_text(&mut text).unwrap();
510    assert_eq!(text, "This is a test RubyString and AnotherRuby.");
511    assert_eq!(
512        ruby,
513        vec![
514            "AnotherRuby".to_string(),
515            "AnotherText".to_string(),
516            "RubyString".to_string(),
517            "RubyText".to_string()
518        ]
519    );
520}